您的訂閱是我製作影片的動力
訂閱點這裡~
影片程式碼(延續昨天)
#步驟三: 建模&診斷
#隨機抽樣
group_1$default.payment.next.month <- as.factor(group_1$default.payment.next.month)
n <- nrow(group_1)
set.seed(1117)
random <- sample(seq_len(n), size = round(0.7 * n))
traindata <- group_1[random,]
testdata <- group_1[-random,]
features <- setdiff(x = names(traindata), y = "default.payment.next.month")
#RF
library(MLmetrics)
library(randomForest)
set.seed(123)
tuneRF(x = traindata[features], y = traindata$default.payment.next.month,
mtryStart = 1,ntreeTry = 500)
rf_model <- randomForest(default.payment.next.month~., data = traindata,
ntree = 500, mtry = 2,
do.trace = 100,na.action = na.roughfix)
rf_future <- predict(rf_model,testdata)
rf_future <- as.data.frame(rf_future)
rf_final <- cbind(rf_future,testdata)
confusion <- table(rf_final$default.payment.next.month,rf_final$rf_future, dnn = c("實際", "預測"))
accuracy <- sum(diag(confusion)) / sum(confusion)
accuracy
F1_Score(rf_final$default.payment.next.month, rf_final$rf_future, positive = NULL)
#組一: mtry=4,acc=80%,f1=0.87
#組二: mtry=4,acc=85%,f1=0.91
#組三: mtry=1,acc=82%,f1=0.9
#全丟: mtry=1,acc=82%,f1=0.9
若內容有誤,還請留言指正,謝謝您的指教